Variables:
Risk Age Sex Country
library(data.table)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
data.table 1.14.8 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
library(tidyr)
#read the data (Wave 5)
# Data of Wave 5
WV5_data <- readRDS("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
head(WV5_data_df[, 1:5])
library(dplyr)
#rename the variables
WV5_data <- WV5_data_df %>%
rename(sex = V235, age = V237, country = V2, wave = V1, risk = V86)
WV5_data
#select only the variables of interest
WV5_data <- WV5_data %>%
select(sex, age, country, wave, risk)
WV5_data
#decode the country names
countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV5_data$country_lab = countrynames$name [match(WV5_data$country, countrynames$code)]
table(WV5_data$country_lab)
Andorra Argentina Australia Brazil Bulgaria Burkina Faso
1003 1002 1421 1500 1001 1534
Canada Chile China Colombia Cyprus (G) Egypt
2164 1000 1991 3025 1050 3051
Ethiopia Finland France Georgia Germany Ghana
1500 1014 1001 1500 2064 1534
Great Britain Guatemala Hong Kong Hungary India Indonesia
1041 1000 1252 1007 2001 2015
Iran Iraq Italy Japan Jordan Malaysia
2667 2701 1012 1096 1200 1201
Mali Mexico Moldova Morocco Netherlands New Zealand
1534 1560 1046 1200 1050 954
Norway Peru Poland Romania Russia Rwanda
1025 1500 1000 1776 2033 1507
Slovenia South Africa South Korea Spain Sweden Switzerland
1037 2988 1200 1200 1003 1241
Taiwan Thailand Trinidad and Tobago Turkey Ukraine United States
1227 1534 1002 1346 1000 1249
Uruguay Viet Nam Zambia
1000 1495 1500
WV5_data
NA
NA
#Read Dataset (Wave 6)
WV6_data <- load("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
#rename variables
WV6_data <- WV6_data %>%
rename(wave = V1, sex = V240, age = V242,country = V2, risk = V76)
#select only the variables of interest
WV6_data <- WV6_data %>%
select(wave, sex, age, country, sex,risk)
WV6_data
NA
#decode daraset (Wave 6)
countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country_lab = countrynames$name [match(WV6_data$country, countrynames$code)]
table(WV6_data$country_lab)
Algeria Argentina Armenia Australia Azerbaijan Belarus
1200 1030 1100 1477 1002 1535
Brazil Chile China Colombia Cyprus (G) Ecuador
1486 1000 2300 1512 1000 1202
Egypt Estonia Georgia Germany Ghana Haiti
1523 1533 1202 2046 1552 1996
Hong Kong India Iraq Japan Jordan Kazakhstan
1000 4078 1200 2443 1200 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico
1303 1500 1200 2131 1300 2000
Morocco Netherlands New Zealand Nigeria Pakistan Palestine
1200 1902 841 1759 1200 1000
Peru Philippines Poland Qatar Romania Russia
1210 1200 966 1060 1503 2500
Rwanda Singapore Slovenia South Africa South Korea Spain
1527 1972 1069 3531 1200 1189
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey
1206 1238 1200 999 1205 1605
Ukraine United States Uruguay Uzbekistan Yemen Zimbabwe
1500 2232 1000 1500 1000 1500
WV6_data
#combine the 2 dataset (Wave 6 + Wave 5)
WV5_data
WV6_data
data = rbind(WV5_data, WV6_data)
data
#number of countries
length(unique(data$country_lab))
[1] 80
#exclusion of participants and omission of missing data (na)
data = subset(data, risk > 0 & sex > 0 & age >0 )
data_Wave5 = subset(WV5_data, risk > 0 & sex > 0 & age >0 )
data_Wave6 = subset(WV6_data, risk > 0 & sex > 0 & age >0)
data <- na.omit(data)
data_Wave5 <- na.omit(data_Wave5)
data_Wave6 <- na.omit(data_Wave6)
#number of participants per Wave
nrow(data)
[1] 156528
nrow(data_Wave5)
[1] 70308
nrow(data_Wave6)
[1] 86220
# Load the dplyr package
library(dplyr)
# Assuming the data frame is called 'data' and the column containing the country information is called 'country'
country_counts <- data %>%
count(country_lab)
# Print the result
print(country_counts)
NA
#number and list and participants pro countries pro Wave (including both Waves together)
table(data_Wave5$country_lab)
Andorra Argentina Australia Brazil Bulgaria Burkina Faso Canada Chile
1001 981 1381 1492 942 1332 2121 969
China Cyprus (G) Egypt Ethiopia Finland France Georgia Germany
1898 1042 3026 1481 1013 995 1451 2019
Ghana Great Britain Hungary India Indonesia Iran Japan Jordan
1513 1036 1003 1575 1942 2615 1032 1163
Malaysia Mali Mexico Moldova Morocco Netherlands Norway Peru
1200 1312 1505 1028 1145 1046 1019 1430
Poland Romania Russia Rwanda Slovenia South Africa South Korea Spain
989 1583 1970 1409 1008 2945 1200 1184
Sweden Switzerland Taiwan Thailand Trinidad and Tobago Turkey Ukraine United States
997 1233 1225 1514 997 1303 967 1219
Uruguay Viet Nam Zambia
989 1416 1452
length(unique(data_Wave5$country_lab))
[1] 51
table(data_Wave6$country_lab)
Algeria Argentina Armenia Australia Azerbaijan Belarus Brazil Chile
1115 1011 1090 1441 1002 1528 1481 914
China Colombia Cyprus (G) Ecuador Egypt Estonia Georgia Germany
2167 1506 993 1201 1523 1509 1190 2024
Ghana Haiti Hong Kong India Iraq Japan Jordan Kazakhstan
1552 1976 977 3472 1187 2201 1195 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico Morocco Netherlands
1190 1497 1177 2043 1300 1996 1035 1813
New Zealand Nigeria Pakistan Palestine Peru Philippines Poland Qatar
802 1759 1176 974 1158 1199 950 1052
Romania Russia Rwanda Singapore Slovenia South Africa South Korea Spain
1436 1806 1527 1938 1051 3481 1182 1173
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey Ukraine United States
1200 1195 1160 983 1097 1573 1500 2189
Uruguay Uzbekistan Yemen Zimbabwe
991 1433 929 1500
length(unique(data_Wave6$country_lab))
[1] 60
length(unique(data$country_lab))
[1] 77
table(data$country_lab)
Algeria Andorra Argentina Armenia Australia Azerbaijan Belarus Brazil
1115 1001 1992 1090 2822 1002 1528 2973
Bulgaria Burkina Faso Canada Chile China Colombia Cyprus (G) Ecuador
942 1332 2121 1883 4065 1506 2035 1201
Egypt Estonia Ethiopia Finland France Georgia Germany Ghana
4549 1509 1481 1013 995 2641 4043 3065
Great Britain Haiti Hong Kong Hungary India Indonesia Iran Iraq
1036 1976 977 1003 5047 1942 2615 1187
Japan Jordan Kazakhstan Kuwait Kyrgyzstan Lebanon Libya Malaysia
3233 2358 1500 1190 1497 1177 2043 2500
Mali Mexico Moldova Morocco Netherlands New Zealand Nigeria Norway
1312 3501 1028 2180 2859 802 1759 1019
Pakistan Palestine Peru Philippines Poland Qatar Romania Russia
1176 974 2588 1199 1939 1052 3019 3776
Rwanda Singapore Slovenia South Africa South Korea Spain Sweden Switzerland
2936 1938 2059 6426 2382 2357 2197 1233
Taiwan Thailand Trinidad and Tobago Tunisia Turkey Ukraine United States Uruguay
2420 2674 1980 1097 2876 2467 3408 1980
Uzbekistan Viet Nam Yemen Zambia Zimbabwe
1433 1416 929 1452 1500
# Transfrom risk item such that high values represent more risk taking
data$risk = 6 - data$risk + 1
# Risk item as ordinal variable
data$risk_ord=data$risk
# Transform risk variable into T-score (mean = 50, sd = 10)
data$risk = 10*scale(data$risk, center=TRUE,scale=TRUE)+50
data
NA
#number of males vs females (1 = males; 2 = females)
table(data$sex)
table(data_Wave5$sex)
table(data_Wave6$sex)
#create a categorical age variable
data$agecat[data$age<20]="15-19"
data$agecat[data$age>=20 & data$age <30] = "20-29"
data$agecat[data$age>=30 & data$age <40] = "30-39"
data$agecat[data$age>=40 & data$age <50] = "40-49"
data$agecat[data$age>=50 & data$age <60] = "50-59"
data$agecat[data$age>=60 & data$age <70] = "60-69"
data$agecat[data$age>=70 & data$age <80] = "70-79"
data$agecat[data$age>=80] = "80+"
#gender variables
data$sex[data$sex == 1] <- "male"
data$sex[data$sex == 2] <- "female"
data_Wave5$sex[data_Wave5$sex == 1] <- "male"
data_Wave5$sex[data_Wave5$sex == 2] <- "female"
data_Wave6$sex[data_Wave6$sex == 1] <- "male"
data_Wave6$sex[data_Wave6$sex == 2] <- "female"
#average age of participants
mean(data$age)
[1] 41.62343
median(data$age)
[1] 39
#wave variables
data$wave[data$wave == 5] <- "Wave 5"
data$wave[data$wave == 6] <- "Wave 6"
data
#age range
range(data$age)
range(data_Wave5$age)
range(data_Wave6$age)
#risk taking Frequency
library(ggplot2)
ggplot(data, aes(x = risk)) +
geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
labs(x = "Risk Taking", y = "Frequency", title = "Histogram of Risk Taking") +
theme_minimal()
#age frequency
ggplot(data, aes(x = age)) +
geom_histogram(binwidth = 0.5, fill = "lightblue", color = "black") +
labs(x = "Age", y = "Frequency", title = "Histogram of Age Distributionn") +
theme_minimal()
#age vs risk taking
ggplot(data, aes(x = agecat, y = risk)) +
geom_boxplot() +
labs(title = "Boxplot of Risk and Adventure by Age",
x = "Age",
y = "Risk and Adventure") +
theme_minimal()
#sex vs risk taking
ggplot(data, aes(as.factor(sex), risk))+
geom_boxplot()
#descriptive data
summary(data)
#data cleaning: deletion of NAs
data = na.omit(data)
summary(data)
sex age country wave risk.V1 country_lab risk_ord agecat
Length:156528 Min. : 15.00 Min. : 12.0 Min. :5.000 Min. :36.15574 Length:156528 Min. :1.000 Length:156528
Class :character 1st Qu.: 28.00 1st Qu.:276.0 1st Qu.:5.000 1st Qu.:42.42763 Class :character 1st Qu.:2.000 Class :character
Mode :character Median : 39.00 Median :466.0 Median :6.000 Median :48.69953 Mode :character Median :3.000 Mode :character
Mean : 41.62 Mean :477.4 Mean :5.551 Mean :50.00000 Mean :3.207
3rd Qu.: 54.00 3rd Qu.:710.0 3rd Qu.:6.000 3rd Qu.:54.97142 3rd Qu.:4.000
Max. :102.00 Max. :894.0 Max. :6.000 Max. :67.51521 Max. :6.000
#risk distribution according to Waves 5 and 6
ggplot(data, aes(as.factor(wave), risk))+
geom_boxplot()
#risk vs age
library(ggplot2)
ggplot(data, aes(risk, age))+
geom_point()+
geom_smooth(method = "lm")
#selected 3 countries: Andorra, Romania, Spain
data1 <- subset(data, country_lab %in% c("Andorra", "Romania", "Spain"))
# 3 risk distribution for 3 countries
ggplot(data1, aes(as.factor(country_lab), risk))+
geom_boxplot()
#age vs risk depending on country
ggplot(data, aes(age, risk, color = as.factor(country_lab)))+
geom_point()+
geom_smooth(method = "lm", se = TRUE)
# read in file that contains hardship indicators manually collected from CIA factbook, WHO, and World Bank
# (see Supplemental Materials for URL sources)
countryfacts = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countryfacts_selection.csv", as.is = TRUE, header = TRUE)
# Create a vector of labels with the same length as the number of columns in 'countryfacts'
labels <- c("code","country","codeWVS","Homicide","GDP","InfMort","LifeExp","GINI","GenderPEdu","code2")
# Print the result
print(countryfacts)
# Load the dplyr package
library(dplyr)
# Assuming the data frame is called 'data' and the column containing the country information is called 'country'
age_counts <- data %>%
count(agecat)
# Print the result
print(age_counts)
summary(data)
# Load the dplyr package
library(dplyr)
# Count the number of occurrences of each age category for each country
agepercountries_counts <- data %>%
group_by(country_lab) %>%
count(agecat)
# Print the result
print(agepercountries_counts)
# Load the dplyr package if not already loaded
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# Count the number of occurrences of each gender for each country
sexpercountries_counts <- data %>%
group_by(country_lab) %>%
count(sex)
# Print the result
print(sexpercountries_counts)
# Load the dplyr package if not already loaded
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# Group the data by country and calculate the age distribution
age_distribution_per_country <- data %>%
group_by(country_lab) %>%
summarize(mean_age = mean(age, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
min_age = min(age, na.rm = TRUE),
max_age = max(age, na.rm = TRUE))
# Print the result
print(age_distribution_per_country)
# Load the dplyr package if not already loaded
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# Group the data by sex and calculate the mean risk for each sex
risk_by_sex <- data %>%
group_by(sex) %>%
summarize(mean_risk = mean(risk_ord, na.rm = TRUE))
# Calculate the difference in mean risk between sexes
sex_difference <- diff(risk_by_sex$mean_risk)
# Print the result
print(risk_by_sex)
print(sex_difference)
# Load the dplyr package if not already loaded
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# Group the data by country_lab and calculate the mean risk for each country_lab
risk_by_country_lab <- data %>%
group_by(country_lab) %>%
summarize(mean_risk = mean(risk_ord, na.rm = TRUE))
# Print the result
print(risk_by_country_lab)
# Load the dplyr package if not already loaded
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# Group the data by age_cat and calculate the mean risk for each age category
risk_by_agecat <- data %>%
group_by(agecat) %>%
summarize(mean_risk = mean(risk_ord, na.rm = TRUE))
# Print the result
print(risk_by_agecat)
NA
data
# Load the dplyr package if not already loaded
if (!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
# Create the 'hardship' column in the 'countryfacts' data frame
countryfacts <- countryfacts %>%
mutate(hardship = (homiciderate + gdp + infantmortality + lifeexpectancy + gini + femalemale_primedu) / 6)
Error in mutate(., hardship = (homiciderate + gdp + infantmortality + :
object 'countryfacts' not found
countryfacts
#Algeria
dataAlgeria <- subset(data, country_lab %in% c("Algeria"))
library(ggplot2)
pAlgeria <- ggplot(dataAlgeria, aes(x = z_age, y = risk, color = as.factor(sex))) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "Scatterplot of risk_ord vs. age in Algeria", x = "Age", y = "Risk")
print(pAlgeria)
# Fit a linear model
modelAlgeria <- lm(risk ~ z_age * factor(sex), data = dataAlgeria)
summary(modelAlgeria)
Call:
lm(formula = risk ~ z_age * factor(sex), data = dataAlgeria)
Residuals:
Min 1Q Median 3Q Max
-21.8218 -9.5573 0.5058 10.2124 24.3816
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 50.0078 0.4995 100.122 < 2e-16 ***
z_age -5.5711 0.5291 -10.530 < 2e-16 ***
factor(sex)male 2.0438 0.6951 2.940 0.003345 **
z_age:factor(sex)male 2.6450 0.7462 3.545 0.000409 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 11.1 on 1111 degrees of freedom
Multiple R-squared: 0.1145, Adjusted R-squared: 0.1121
F-statistic: 47.88 on 3 and 1111 DF, p-value: < 2.2e-16
# Extract coefficients
coefficients <- coef(modelAlgeria)
# Save the intercept and slope
intercept_Algeria <- coefficients[1]
slope_age_Algeria <- coefficients[2]
slope_gender_Algeria <- coefficients[3]
# Print the intercept and sloped
print(intercept_Algeria)
(Intercept)
50.00784
print(slope_age_Algeria)
z_age
-5.571119
print(slope_gender_Algeria)
factor(sex)male
2.043817
dataAlgeria
#Andorra
dataAndorra <- subset(data, country_lab %in% c("Andorra"))
library(ggplot2)
PAndorra = ggplot(dataAndorra, aes(x = scale(age), risk_ord, color = as.factor(sex)))+
geom_point()+
geom_smooth(method = "lm")
PAndorra
modelAndorra<- lm(risk_ord ~ scale(age) * factor(sex), data = dataAndorra)
modelAndorra
Call:
lm(formula = risk_ord ~ scale(age) * factor(sex), data = dataAndorra)
Coefficients:
(Intercept) scale(age) factor(sex)male scale(age):factor(sex)male
3.2579 -0.2608 0.4797 -0.1088
coefficients <- coef(modelAndorra)
# Save the intercept
intercept_Andorra <- coefficients[1]
slope_age_Andorra <- coefficients[2]
slope_gender_Andorra <- coefficients[3]
# Print the intercept
print(intercept_Andorra)
(Intercept)
3.257907
print(slope_age_Andorra)
scale(age)
-0.2608296
print(slope_gender_Andorra)
factor(sex)male
0.4797491
library(dplyr)
library(dplyr)
library(dplyr)
table_data <- data %>%
group_by(country_lab) %>%
summarise(
n = n(),
female_percentage = mean(case_when(sex == 2 ~ 1, TRUE ~ 0)) * 100, # Adjust encoding
mean_age = mean(age, na.rm = TRUE),
age_range = paste(min(age, na.rm = TRUE), "-", max(age, na.rm = TRUE)),
mean_risk = mean(risk, na.rm = TRUE)
)
table_data
NA
NA
NA
NA
NA
#Argentina
print(intercept_Argentina)
(Intercept)
48.06712
print(slope_age_Argentina)
z_age
-1.99775
print(slope_gender_Argentina)
factor(sex)male
2.708319
#Armenia
dataArmenia <- subset(data, country_lab %in% c("Armenia"))
library(ggplot2)
pArmenia = ggplot(dataArmenia, aes(age, risk_ord, color = as.factor(sex)))+
geom_point()+
geom_smooth(method = "lm")
pArmenia
#Australia
dataAustralia <- subset(data, country_lab %in% c("Australia"))
library(ggplot2)
pAustralia = ggplot(dataAustralia, aes(z_age, risk, color = as.factor(sex)))+
geom_point()+
geom_smooth(method = "lm")
pAustralia
ModelAustralia = lm(risk ~ z_age + factor(sex), data = dataAustralia)
ModelAustralia
Call:
lm(formula = risk ~ z_age + factor(sex), data = dataAustralia)
Coefficients:
(Intercept) z_age factor(sex)male
48.005 -2.650 3.432
coefficients <- coef(ModelAustralia)
# Save the intercept and slopes
intercept_Australia <- coefficients[1]
slope_age_Australia <- coefficients[2]
slope_gender_Australia <- coefficients[3]
# Print the intercept and slopes
print(intercept_Australia)
(Intercept)
48.00453
print(slope_age_Australia)
z_age
-2.650168
print(slope_gender_Australia)
factor(sex)male
3.432388
#Azerbaijan
dataAzerbaijan <- subset(data, country_lab %in% c("Australia"))
library(ggplot2)
pAzerbaijan = ggplot(dataAzerbaijan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pAzerbaijan
#Belarus
dataBelarus <- subset(data, country_lab %in% c("Belarus"))
library(ggplot2)
pBelarus = ggplot(dataBelarus, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pBelarus
#Brazil
dataBrazil <- subset(data, country_lab %in% c("Brazil"))
library(ggplot2)
pBrazil = ggplot(dataBrazil, aes(z_age, risk, color = as.factor(sex)))+
geom_point()+
geom_smooth(method = "lm")
pBrazil
ModelBrazil = lm(risk ~ z_age + factor(sex), data = dataBrazil)
ModelBrazil
Call:
lm(formula = risk ~ z_age + factor(sex), data = dataBrazil)
Coefficients:
(Intercept) z_age factor(sex)male
45.239 -1.740 2.429
coefficients <- coef(ModelBrazil)
# Save the intercept and slopes
intercept_Brazil <- coefficients[1]
slope_age_Brazil <- coefficients[2]
slope_gender_Brazil <- coefficients[3]
# Print the intercept and slopes
print(intercept_Brazil)
(Intercept)
45.23868
print(slope_age_Brazil)
z_age
-1.740097
print(slope_gender_Brazil)
factor(sex)male
2.429132
#Bulgaria
dataBulgaria <- subset(data, country_lab %in% c("Bulgaria"))
library(ggplot2)
pBulgaria = ggplot(dataBulgaria, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pBulgaria
#Burkina Faso
dataBurkinaFaso <- subset(data, country_lab %in% c("Burkina Faso"))
library(ggplot2)
pBurkinaFaso = ggplot(dataBurkinaFaso, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pBurkinaFaso
#Canada
dataCanada <- subset(data, country_lab %in% c("Canada"))
library(ggplot2)
pCanada = ggplot(dataCanada, aes(z_age, risk, color = as.factor(sex)))+
geom_point()+
geom_smooth(method = "lm")
pCanada
ModelCanada = lm(risk ~ z_age + factor(sex), data = dataCanada)
ModelCanada
Call:
lm(formula = risk ~ z_age + factor(sex), data = dataCanada)
Coefficients:
(Intercept) z_age factor(sex)male
48.682 -2.568 3.613
coefficients <- coef(ModelCanada)
# Save the intercept and slopes
intercept_Canada <- coefficients[1]
slope_age_Canada <- coefficients[2]
slope_gender_Canada <- coefficients[3]
# Print the intercept and slopes
print(intercept_Canada)
(Intercept)
48.68201
print(slope_age_Canada)
z_age
-2.568204
print(slope_gender_Canada)
factor(sex)male
3.61295
#Chile
dataChile <- subset(data, country_lab %in% c("Chile"))
library(ggplot2)
pChile = ggplot(dataChile, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pChile
#China
dataChina <- subset(data, country_lab %in% c("China"))
library(ggplot2)
pChina = ggplot(dataChina, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pChina
#Colombia
dataColombia <- subset(data, country_lab %in% c("Colombia "))
library(ggplot2)
pColombia = ggplot(dataColombia, aes(z_age, risk, color = as.factor(sex)))+
geom_point()+
geom_smooth(method = "lm")
pColombia
ModelColombia = lm(risk ~ z_age + as.factor(sex), data = dataColombia)
Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
#Ecuador
dataEcuador <- subset(data, country_lab %in% c("Ecuador"))
library(ggplot2)
pEcuador = ggplot(dataEcuador, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pEcuador
#Egypt
dataEgypt <- subset(data, country_lab %in% c("Egypt"))
library(ggplot2)
pEgypt = ggplot(dataEgypt, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pEgypt
#Estonia
dataEstonia <- subset(data, country_lab %in% c("Estonia"))
library(ggplot2)
pEstonia = ggplot(dataEstonia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pEstonia
#Ethiopia
dataEthiopia <- subset(data, country_lab %in% c("Ethiopia"))
library(ggplot2)
pEthiopia = ggplot(dataEthiopia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pEthiopia
#Finland
dataFinland <- subset(data, country_lab %in% c("Finland"))
library(ggplot2)
pFinland = ggplot(dataFinland, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pFinland
#France
dataFrance <- subset(data, country_lab %in% c("France"))
library(ggplot2)
pFrance = ggplot(dataFrance, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pFrance
#Georgia
dataGeorgia <- subset(data, country_lab %in% c("Georgia"))
library(ggplot2)
pGeorgia = ggplot(dataGeorgia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pGeorgia
#Germany
dataGermany <- subset(data, country_lab %in% c("Germany"))
library(ggplot2)
pGermany = ggplot(dataGermany, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pGermany
#Ghana
dataGhana <- subset(data, country_lab %in% c("Ghana"))
library(ggplot2)
pGhana = ggplot(dataGhana, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pGhana
#Great Britain
dataUK <- subset(data, country_lab %in% c("Great Britain"))
library(ggplot2)
pUK = ggplot(dataUK, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pUK
#Haiti
dataHaiti <- subset(data, country_lab %in% c("Haiti"))
library(ggplot2)
pHaiti = ggplot(dataHaiti, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pHaiti
#Hong Kong
dataHongKong <- subset(data, country_lab %in% c("Hong Kong"))
library(ggplot2)
pHongKong = ggplot(dataHongKong, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pHongKong
#Hungary
dataHungary <- subset(data, country_lab %in% c("Hungary"))
library(ggplot2)
pHungary = ggplot(dataHungary, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pHungary
#India
dataIndia <- subset(data, country_lab %in% c("India"))
library(ggplot2)
pIndia = ggplot(dataIndia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pIndia
#Indonesia
dataIndonesia <- subset(data, country_lab %in% c("Indonesia"))
library(ggplot2)
pIndonesia = ggplot(dataIndonesia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pIndonesia
#Iran
dataIran <- subset(data, country_lab %in% c("Iran"))
library(ggplot2)
pIran = ggplot(dataIran, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pIran
#Iraq
dataIraq <- subset(data, country_lab %in% c("Iraq"))
library(ggplot2)
pIraq = ggplot(dataIraq, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pIraq
#Japan
dataJapan <- subset(data, country_lab %in% c("Japan"))
library(ggplot2)
pJapan = ggplot(dataJapan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pJapan
#Jordan
dataJordan <- subset(data, country_lab %in% c("Jordan"))
library(ggplot2)
pJordan = ggplot(dataJordan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pJordan
#Kazakhstan
dataKazakhstan <- subset(data, country_lab %in% c("Kazakhstan"))
library(ggplot2)
pKazakhstan = ggplot(dataKazakhstan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pKazakhstan
#Kuwait
dataKuwait <- subset(data, country_lab %in% c("Kuwait"))
library(ggplot2)
pKuwait = ggplot(dataKuwait, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pKuwait
#Kyrgyzstan
dataKyrgyzstan <- subset(data, country_lab %in% c("Kyrgyzstan"))
library(ggplot2)
pKyrgyzstan = ggplot(dataKyrgyzstan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pKyrgyzstan
#Lebanon
dataLebanon <- subset(data, country_lab %in% c("Lebanon"))
library(ggplot2)
pLebanon = ggplot(dataLebanon, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pLebanon
#Libya
dataLibya <- subset(data, country_lab %in% c("Libya"))
library(ggplot2)
pLibya = ggplot(dataLibya, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pLibya
#Malaysia
dataMalaysia <- subset(data, country_lab %in% c("Malaysia"))
library(ggplot2)
pMalaysia = ggplot(dataMalaysia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pMalaysia
#Mali
dataMali <- subset(data, country_lab %in% c("Mali"))
library(ggplot2)
pMali = ggplot(dataMali, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pMali
#Mexico
dataMexico <- subset(data, country_lab %in% c("Mexico"))
library(ggplot2)
pMexico = ggplot(dataMexico, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pMexico
#Morocco
dataMorocco <- subset(data, country_lab %in% c("Morocco"))
library(ggplot2)
pMorocco = ggplot(dataMorocco, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pMorocco
#Netherland
dataNetherlands <- subset(data, country_lab %in% c("Netherlands"))
library(ggplot2)
pNetherlands = ggplot(dataNetherlands, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pNetherlands
#New Zealand
dataNewZealand <- subset(data, country_lab %in% c("New Zealand"))
library(ggplot2)
pNewZealand = ggplot(dataNewZealand, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pNewZealand
#Nigeria
dataNigeria <- subset(data, country_lab %in% c("Nigeria"))
library(ggplot2)
pNigeria = ggplot(dataNigeria, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pNigeria
#Norway
dataNorway <- subset(data, country_lab %in% c("Norway"))
library(ggplot2)
pNorway = ggplot(dataNorway, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pNorway
#Pakistan
dataPakistan <- subset(data, country_lab %in% c("Pakistan"))
library(ggplot2)
pPakistan = ggplot(dataPakistan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pPakistan
#Palestine
dataPalestine <- subset(data, country_lab %in% c("Palestine"))
library(ggplot2)
pPalestine = ggplot(dataPalestine, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pPalestine
#Peru
dataPeru <- subset(data, country_lab %in% c("Peru"))
library(ggplot2)
pPeru = ggplot(dataPeru, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pPeru
#Philippines
dataPhilippines <- subset(data, country_lab %in% c("Philippines"))
library(ggplot2)
pPhilippines = ggplot(dataPhilippines, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pPhilippines
#Poland
dataPoland <- subset(data, country_lab %in% c("Poland"))
library(ggplot2)
pPoland = ggplot(dataPoland, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pPoland
#Qatar
dataQatar <- subset(data, country_lab %in% c("Qatar"))
library(ggplot2)
pQatar = ggplot(dataQatar, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pQatar
#Romania
dataRomania <- subset(data, country_lab %in% c("Romania"))
library(ggplot2)
pRomania = ggplot(dataRomania, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pRomania
#Russia
dataRussia <- subset(data, country_lab %in% c("Russia"))
library(ggplot2)
pRussia = ggplot(dataRussia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pRussia
#Rwanda
dataRwanda <- subset(data, country_lab %in% c("Rwanda"))
library(ggplot2)
pRwanda = ggplot(dataRwanda, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pRwanda
#Singapore
dataSingapore <- subset(data, country_lab %in% c("Singapore"))
library(ggplot2)
pSingapore = ggplot(dataSingapore, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSingapore
#Slovenia
dataSlovenia <- subset(data, country_lab %in% c("Slovenia"))
library(ggplot2)
pSlovenia = ggplot(dataSlovenia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSlovenia
#South Africa
dataSouthAfrica <- subset(data, country_lab %in% c("South Africa"))
library(ggplot2)
pSouthAfrica = ggplot(dataSouthAfrica, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSouthAfrica
#South Korea
dataSouthKorea <- subset(data, country_lab %in% c("South Korea"))
library(ggplot2)
pSouthKorea = ggplot(dataSouthKorea, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSouthKorea
#Spain
dataSpain <- subset(data, country_lab %in% c("Spain"))
library(ggplot2)
pSpain = ggplot(dataSpain, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSpain
#Sweden
dataSweden <- subset(data, country_lab %in% c("Sweden"))
library(ggplot2)
pSweden = ggplot(dataSweden, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSweden
#Switzerland
dataSwitzerland <- subset(data, country_lab %in% c("Switzerland"))
library(ggplot2)
pSwitzerland = ggplot(dataSwitzerland, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pSwitzerland
#Taiwan
dataTaiwan <- subset(data, country_lab %in% c("Taiwan"))
library(ggplot2)
pTaiwan = ggplot(dataTaiwan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pTaiwan
#Thailand
dataThailand <- subset(data, country_lab %in% c("Thailand"))
library(ggplot2)
pThailand = ggplot(dataThailand, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pThailand
#Trinidad and Tobago
dataTrinidad <- subset(data, country_lab %in% c("Trinidad and Tobago"))
library(ggplot2)
pTrinidad = ggplot(dataTrinidad, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pTrinidad
#Tunisia
dataTunisia<- subset(data, country_lab %in% c("Tunisia"))
library(ggplot2)
pTunisia = ggplot(dataTunisia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pTunisia
#Turkey
dataTurkey <- subset(data, country_lab %in% c("Turkey"))
library(ggplot2)
pTurkey = ggplot(dataTurkey, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pTurkey
#Ukraine
dataUkraine <- subset(data, country_lab %in% c("Ukraine"))
library(ggplot2)
pUkraine = ggplot(dataUkraine, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pUkraine
#United States
dataUS <- subset(data, country_lab %in% c("United States"))
library(ggplot2)
pUS = ggplot(dataUS, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pUS
#Uruguay
dataUruguay <- subset(data, country_lab %in% c("Uruguay"))
library(ggplot2)
pUruguay = ggplot(dataUruguay, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pUruguay
#Uzbekistan
dataUzbekistan <- subset(data, country_lab %in% c("Uzbekistan"))
library(ggplot2)
pUzbekistan = ggplot(dataUzbekistan, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pUzbekistan
#Viet Nam
dataVietNam <- subset(data, country_lab %in% c("Viet Nam"))
library(ggplot2)
pVietNam = ggplot(dataVietNam, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pVietNam
#Yemen
dataYemen <- subset(data, country_lab %in% c("Yemen"))
library(ggplot2)
pYemen = ggplot(dataYemen, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pYemen
#Zambia
dataZambia <- subset(data, country_lab %in% c("Zambia"))
library(ggplot2)
pZambia = ggplot(dataZambia, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pZambia
#Zimbabwe
dataZimbabwe <- subset(data, country_lab %in% c("Zimbabwe"))
library(ggplot2)
pZimbabwe = ggplot(dataZimbabwe, aes(age, risk_ord))+
geom_point()+
geom_smooth(method = "lm")
pZimbabwe
#summarise the countries
# Assuming your original data frame has uppercase variable names (Age and Risk)
means_data <- data %>%
group_by(country_lab) %>%
summarise(
Mean_Age = mean(age),
Mean_Risk = mean(risk_ord))
means_data
NA
countryfacts
NA
gdp_data <- countryfacts %>%
select(, label, code, gdp)
gdp_data
NA
library(ggplot2)
# Assuming 'data' is your data frame with a column 'age'
data$z_age <- scale(data$age)
ggplot(data, aes(z_age, risk, color = factor(sex))) +
geom_point(size = 0.1) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("blue", "red"), labels = c("Male", "Female")) +
labs(color = "Gender", x = "Age", y = "Risk Taking") +
theme_minimal()
Model1 <- lm(risk ~ scale(age) * factor(sex), data = data)
Model1
Call:
lm(formula = risk ~ scale(age) * factor(sex), data = data)
Coefficients:
(Intercept) scale(age) factor(sex)male scale(age):factor(sex)male
3.01844 -0.37779 0.39339 -0.02303
data
table_data <- data %>%
group_by(country_lab) %>%
summarize(
n = n(),
female_percentage = mean(sex == 1) * 100,
mean_age = mean(age, na.rm = TRUE),
age_range = paste(min(age, na.rm = TRUE), "-", max(age, na.rm = TRUE)),
mean_risk = mean(risk, na.rm = TRUE)
)
table_data
NA
NA
NA
NA
NA
NA
regression_results <- data %>%
group_by(country_lab) %>%
do(model = lm(risk ~ age + sex, data = .)) %>%
summarize(
country = first(country_lab),
intercept = coef(summary(model))[1, 1],
slope_age = coef(summary(model))[2, 1],
slope_gender = coef(summary(model))[3, 1]
)
regression_results
write.csv(regression_results, "/Users/cristinacandido/Documents/Github/risk_wvs/regression_results.csv", row.names = FALSE)